Celestin Apprentice 5

home *** CD-ROM | disk | FTP | other *** search

/ Celestin Apprentice 5 / Apprentice-Release5.iso / Source Code / Libraries / Advanced I⁄O v2.3 / Advanced i⁄o / arithm_modadh.cc < prev next >

Wrap

Text File | 1995-07-08 | 10.2 KB | 328 lines | [TEXT/ALFA]

// This may look like C code, but it is really -*- C++ -*- /* ************************************************************************ * * Arithmetic Coding * Adaptive Histogram-based model for the data source * * * The present file provides the functionality for both building the * histogram for a sequence of integers items and supplying the * arithmetic codec with probabilities of the items estimated from * the histogram. * * The program uses the histogram to find out which symbols shall really * come out and how frequent. It allocates frequency tables only for * the symbols that really rather than potentially need to be coded. * In all other respects (assuming Lorentzian-like initial probability * distribution and tailoring as more symbols become known) the * model behaves way the same as the simple AdaptiveModel does. * * $Id: arithm_modadh.cc,v 2.0 1995/02/07 19:37:03 oleg Exp oleg $ * ************************************************************************ */ #ifdef __GNUC__ #pragma implementation "arithm_modadh.h" #endif #include "arithm_modadh.h" #include "std.h" /* *======================================================================== * Histogram-based Adaptive model for the data source */ /* *------------------------------------------------------------------------ * Constructors and Destructors */ // Constructor AdaptiveHistModel::AdaptiveHistModel(const Histogram& histogram) : no_distinct_symbols(histogram.no_distinct_symbols()), update_inc(0) { // Skip leading and tailing dummy symbols // (never occured in the histogram) for(symbol_lwb=histogram.symbol_lwb; symbol_lwb < histogram.symbol_upb && histogram.get(symbol_lwb) == 0; symbol_lwb++) ; for(symbol_upb=histogram.symbol_upb; symbol_upb > symbol_lwb && histogram.get(symbol_upb) == 0; symbol_upb--) ; assure( symbol_upb > symbol_lwb, "there should be at least 2 symbols to code" ); no_potential_symbols = symbol_upb - symbol_lwb + 1; assert( no_potential_symbols >= no_distinct_symbols ); allocate_model(no_distinct_symbols); symbol_to_index = new Index[no_potential_symbols]; index_to_symbol = new Symbol[no_distinct_symbols]; initialize_model(histogram); } // Constructor for a dummy model. The real // model contents is supposed to be read // in later AdaptiveHistModel::AdaptiveHistModel(void) : symbol_lwb(0), symbol_upb(0), no_potential_symbols(0), no_distinct_symbols(0), update_inc(0) { symbol_to_index = 0; index_to_symbol = 0; } // Destructor AdaptiveHistModel::~AdaptiveHistModel(void) { assert( symbol_to_index != 0 && index_to_symbol != 0 ); delete [] symbol_to_index; delete [] index_to_symbol; } /* *------------------------------------------------------------------------ * Sort the histogram and set up the model */ static struct Hist_elem { unsigned int count; Symbol symbol; } * Hist_sorted; // Comparison routine to sort the Histogram // in the descending order static int hist_compar(const void * eli, const void * elj) { if( ((const Hist_elem*)eli)->count < ((const Hist_elem*)elj)->count ) return 1; else if( ((const Hist_elem*)eli)->count == ((const Hist_elem*)elj)->count ) return 0; else return -1; } void AdaptiveHistModel::initialize_model(const Histogram& histogram) { Hist_sorted = new Hist_elem[no_distinct_symbols]; register Symbol symbol; register int i; for(symbol=histogram.symbol_lwb,i=0; symbol<=histogram.symbol_upb; symbol++) { unsigned int count = histogram.get(symbol); if( count != 0 ) Hist_sorted[i].count = count, Hist_sorted[i].symbol = symbol, i++; } assert( i == no_distinct_symbols ); qsort(Hist_sorted,no_distinct_symbols,sizeof(Hist_sorted[0]),hist_compar); memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols); for(i=0; i<no_distinct_symbols; i++) { Index index = i+1; Symbol symbol = Hist_sorted[i].symbol; assert( symbol <= symbol_upb && symbol >= symbol_lwb ); symbol_to_index[symbol-symbol_lwb] = index; index_to_symbol[index-1] = symbol; } assert( i<EOF_index ); initial_distribution(); } // Assign initial frequency counts // according to the Lorentzian ditribution void AdaptiveHistModel::initial_distribution(void) { register int i; cum_frequencies[no_indices] = 0; // update_inc > 1 means the real // occurence of a symbol means more update_inc = no_indices/10 + 1; // than just a potential occurence. // Since all frequencies must be >0, // frequency count one can be regarded // as being "infinitely small" // The folowing distribution is // certaily ad hoc, but seems to work for(i=no_indices; i>0; i--) { frequencies[i] = ( i <= 2 ? 45*update_inc-30 : i <= 12 ? 3*update_inc : 1 ); cum_frequencies[i-1] = cum_frequencies[i] + frequencies[i]; } frequencies[0] = 0; // Must be zero (an impossible value) } /* *------------------------------------------------------------------------ * Searching for index/symbol */ // Return the index of a given symbol Index AdaptiveHistModel::get_index(const Symbol symbol) const { if( symbol < symbol_lwb || symbol > symbol_upb ) _error("Symbol %d is out of interval [%d,%d]", symbol, symbol_lwb, symbol_upb); Index index = symbol_to_index[symbol-symbol_lwb]; if( index == 0 ) _error("\nSymbol %d has not been counted when Histogram was constructed", symbol); return index; } // Return the symbol for a given index Symbol AdaptiveHistModel::get_symbol(const Index index) const { if( index < 1 || index > no_indices || index == EOF_index ) _error("Index %d is out of boundaries [1,%d)",index,no_indices); return index_to_symbol[index-1]; } /* *------------------------------------------------------------------------ * Input/Output for the frequency tables * As a matter of fact, only index_to_symbol[] table along with its size * needs to be saved. All other tables can easily be reconstructed from that. * * Thus, the format the tables are written is follows * short no_distinct_symbols * short index_to_symbol[0..no_distinct_symbols-1] * The tables entries are coded using a variable bit size algorithm, * see BitIn/BitOut for more details. */ void AdaptiveHistModel::open(BitOut& file) { file.write_short(no_distinct_symbols); for(register int i=0; i<no_distinct_symbols; i++) file.put_short(index_to_symbol[i]); } // Read in the tables and set up // the model void AdaptiveHistModel::open(BitIn& file) { no_distinct_symbols = file.read_short("Reading AdaptiveHistModel tables"); allocate_model(no_distinct_symbols); index_to_symbol = new Symbol[no_distinct_symbols]; for(register int i=0; i<no_distinct_symbols; i++) { const Symbol symbol = file.get_short(); if( i== 0 ) symbol_lwb = symbol_upb = symbol; else symbol_lwb = symbol_lwb > symbol ? symbol : symbol_lwb, symbol_upb = symbol_upb < symbol ? symbol : symbol_upb; index_to_symbol[i] = symbol; } no_potential_symbols = symbol_upb - symbol_lwb + 1; assert( no_potential_symbols > 1 ); assert( no_distinct_symbols <= no_potential_symbols ); symbol_to_index = new Index[no_potential_symbols]; memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols); for(register Index index=1; index<=no_distinct_symbols; index++) { Symbol symbol = index_to_symbol[index-1]; symbol_to_index[symbol-symbol_lwb] = index; } initial_distribution(); } /* *------------------------------------------------------------------------ * Update the adaptive model * to account for occurence of a particular index */ // Scale the accumulated statistics down // in anticipation of a change, or simply to // keep counters from overflow void AdaptiveHistModel::scale_down_past(void) { register int cum = 0; // If so, halve all the counts for(register int i=no_indices; i>=0; i--) { // keeping them nonzero, and cum_frequencies[i] = cum; // recompute the cumulative counts cum += (frequencies[i] = (frequencies[i]+1) >> 1); } } void AdaptiveHistModel::update_model(const Index index) { // See if freq counts near their max if( total_cum_freq() >= (Top_freq>>1) ) // /2 makes rescaling work more often scale_down_past(); // Tentatively increment the freq // count for index, just an estimate const Lword tent_freq = frequencies[index] + (update_inc > 4 ? update_inc : 1); // Try to pull 'index' to the beginning // of the freq. table, skipping symbols // with the lower freq. register Lword * fp = &frequencies[index]; while( *--fp != 0 && *fp <= tent_freq ) // Note, freq[0] is always 0 ; // Find a new position for the index const Index new_index = ++fp - frequencies; if( new_index < index ) // If the index is to move, update { // the translation tables Symbol symbol_oind = index_to_symbol[index-1]; Symbol symbol_nind = index_to_symbol[new_index-1]; index_to_symbol[index-1] = symbol_nind; index_to_symbol[new_index-1] = symbol_oind; symbol_to_index[symbol_oind-symbol_lwb] = new_index; symbol_to_index[symbol_nind-symbol_lwb] = index; // Note, we've just swapped index // and new_index. If their frequencies // differ, we need to update the // frequecies and cumulative freqs const Lword old_freq = frequencies[index]; const Lword new_freq = frequencies[new_index]; if( old_freq != new_freq ) { frequencies[index] = new_freq; frequencies[new_index] = old_freq; const long int delta = new_freq - old_freq; for(register Lword * cfp = &cum_frequencies[new_index]; cfp < &cum_frequencies[index];) *cfp++ += delta; } } frequencies[new_index] += update_inc; // Increment the freq count for index register Lword * cfp; // And update the cumulative counts for(cfp=&cum_frequencies[new_index]; cfp>&cum_frequencies[0]; ) *--cfp += update_inc; #if 0 { message("index %d, new_index %d, update_inc %d\n",index,new_index,update_inc); for(register Index i=0; i<no_indices; i++) message("%d ",frequencies[i]); message("\n"); } #endif }